# ===============================================================#
#                     Replication files for:                     #
#.  "Attitudinal and Behavioral Legacies of Wartime Violence:    #
#                      A Meta-Analysis"                          #
#                        Joan Barceló                            #
#               American Political Science Review                #
#               Last update: September 3, 2025                   #
# ===============================================================#

#################################
# Figure E.2: Residuals from regression predicting the number of studies based on conflict characteristic
################################

## ------------------------ Load Packages ------------------------

library(dplyr)
library(ggplot2)
library(ggrepel)

#Load dataset
model_data <- haven::read_dta("~/Auxiliary/model_data.dta")

# Refit logged model (best fit) on conflict fundamentals
ols_model <- lm(
  log1p(num_studies) ~ factor(type_of_conflict) + conflict_duration +
    log(total_bd_best_10k) + I(2023 - year_last),
  data = model_data
)
summary(ols_model)

# Generate predictions and residuals on this subset
model_data$predicted_prob <- predict(ols_model, type = "response")
model_data$residuals <- (log(model_data$num_studies + 1) - model_data$predicted_prob)

# Sort by absolute residuals
model_data <- model_data %>%
  arrange(desc(residuals))

model_data$observation_id <- factor(1:nrow(model_data), levels = rev(1:nrow(model_data)))

# Define custom labels for specific conflict IDs
custom_labels <- c(
  "315" = "The Troubles (1968–1998)",
  "386" = "Algerian Civil War (1991–2002)",
  "337" = "Somali Civil War (1982–present)",
  "308" = "Moro Conflict in Mindanao (1970–2020)", 
  "227" = "India Maoist/Naxalite Conflict (1948–present)", 
  "209" = "Philippine Communist Insurgency (1946–present)",
  "289" = "Colombian Armed Conflict (1964–present)",
  "234" = "Israeli–Palestinian Conflict (1948–present)",
  "390" = "Croatian War of Independence (1991–1995)",
  "412" = "Kosovo War (1998–1999)",
  "13306" = "Russo-Ukrainian Conflict (2014–present)",
  "327" = "Angolan Civil War (1975–2002)"
)

# Residuals plot with manually labeled annotations
Figure_E2 <- ggplot(model_data, aes(x = observation_id, y = residuals)) +
  geom_bar(stat = "identity", aes(fill = residuals > 0)) +
  scale_fill_manual(values = c("TRUE" = "steelblue", "FALSE" = "salmon"), guide = FALSE) +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.5) +
  
  # Manually labeled annotations for top residuals
  geom_text_repel(
    data = model_data %>% top_n(6, residuals),
    aes(label = custom_labels[as.character(conflict_id)]),
    nudge_y = 0.7,
    direction = "y",
    hjust = 1.1,
    segment.color = "gray90",
    segment.size = 0.4,
    size = 3,
    color = "black"
  ) +
  
  # Manually labeled annotations for bottom residuals
  geom_text_repel(
    data = model_data %>% top_n(-6, residuals),
    aes(label = custom_labels[as.character(conflict_id)]),
    nudge_y = -2.3,
    direction = "y",
    hjust = 0,
    segment.color = "gray90",
    segment.size = 0.4,
    size = 3,
    color = "black"
  ) +
  
  # Explicit labels within plot (unaltered hjust)
  ggplot2::annotate("text", x = Inf, y = 2.5, label = "Overrepresented", color = "steelblue",
                    hjust = 3.75, vjust = 1.5, size = 4, fontface = "bold") +
  ggplot2::annotate("text", x = Inf, y = -2.5, label = "Underrepresented", color = "salmon",
                    hjust = 3.5, vjust = 1.5, size = 4, fontface = "bold") +
  
  labs(x = "",
       y = "Residuals",
       title = "",
       subtitle = "") +
  
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid = element_blank(),
    plot.margin = margin(1, 3, 1, 1, "cm")
  )

print(Figure_E2)
